import pandas as pd 
import numpy as np
import re
from sklearn import datasets, linear_model
from sklearn.feature_selection import f_regression, SelectFdr
from statsmodels.sandbox.stats.multicomp import multipletests
import networkx as nx

# LOADING mouse data

def loadMouseDEGs(fdrThresh):
	homedir = "/home/andrew/"
	mouseDat_all = pd.read_excel(homedir + "paper1/YangSup/nn.4256-S3.xlsx", sheetname = None)
	finalDict = dict.fromkeys(mouseDat_all.keys())
	preDict = dict.fromkeys(mouseDat_all.keys())

	#mouse2Human = pd.read_csv("/home/andrew/paper1/HMD_HumanPhenotype.rpt", sep = "\t", header = None, index_col = False)
	mouse2Human = pd.read_csv("/home/andrew/extData/hdinhd/orthoConv2.txt",sep = "\t")
	mouse2Human2 = mouse2Human.dropna(subset = ['Gene ID.1'])
	mouse2Human2['human'] = mouse2Human2['Gene ID.1'].apply(lambda x: int(x.split(';')[0]))
	convDictMH = dict(zip(mouse2Human2['Gene ID'],mouse2Human2['human']))


	for sheet in mouseDat_all.keys():
		mouseDat = mouseDat_all[sheet]
		fdrCols = [col for col in mouseDat.columns if 'FDR' in col]
		fcCols = [col for col in mouseDat.columns if 'log2FoldChange' in col]

		keys = [col.lstrip("FDR.") for col in fdrCols]
		mouseUpGenes = dict.fromkeys(keys)
		mouseDownGenes = dict.fromkeys(keys)

	for key, fdr, fc in zip(keys,fdrCols, fcCols):
		mouseUpGenes[key] = mouseDat[(mouseDat[fdr] < fdrThresh) & (mouseDat[fc] > 0)]['Entrez']
		mouseDownGenes[key] = mouseDat[(mouseDat[fdr] < fdrThresh) & (mouseDat[fc] < 0)]['Entrez']

# converting mouse IDs to human entrez genes
	mouseUpGenes_human = dict.fromkeys(keys)
	mouseDownGenes_human = dict.fromkeys(keys)



	for key in keys:
		mouseUpGenes_human[key] = [convDictMH[gene] for gene in mouseUpGenes[key] if gene in convDictMH.keys()]
		mouseDownGenes_human[key] = [convDictMH[gene] for gene in mouseDownGenes[key] if gene in convDictMH.keys()]


	finalDict[sheet] = mouseUpGenes_human, mouseDownGenes_human
	preDict[sheet] = mouseUpGenes, mouseDownGenes
	return finalDict

def loadMouseTable():
	homedir = "/home/andrew/"
	mouseDat_all = pd.read_excel(homedir + "paper1/YangSup/nn.4256-S3.xlsx", sheetname = None)
	mouseDatList = []

	for sheet in mouseDat_all.keys():
		mouseDat = mouseDat_all[sheet].copy()
		mouseDat.index = mouseDat['Entrez']
		newCols = [col + '_' + sheet for col in mouseDat.columns]
		mouseDat.columns = newCols
		mouseDatList.append(mouseDat)

	mouseTable = mouseDatList[0]

	for table in mouseDatList[1:]:
		mouseTable = mouseTable.join(table, how = 'outer')

	mgitab = pd.read_csv('/home/andrew/paper1/HOM_MouseHumanSequence.rpt', sep = '\t')

	homids = set(mgitab['HomoloGene ID'])

	G = nx.Graph()

	for i in homids:
		for row1 in mgitab[(mgitab['HomoloGene ID'] == i) & (mgitab['NCBI Taxon ID'] == 10090)].iterrows():
			mouseid = row1[1]['EntrezGene ID']
			mouseSymbol = row1[1]['Symbol']
			for row2 in mgitab[(mgitab['HomoloGene ID'] == i) & (mgitab['NCBI Taxon ID'] == 9606)].iterrows():
				humanid = row2[1]['EntrezGene ID']
				humanSymbol = row2[1]['Symbol']
				G.add_node(mouseid, species = "mouse", symbol = mouseSymbol)
				G.add_node(humanid, species = "human", symbol = humanSymbol)
				G.add_edge(mouseid,humanid)
	mouseTable['EntrezMouse'] = mouseTable.index

	return mouseTable, G


def loadR62():
	fc_file = "/home/andrew/paper1/mouseData/R62/datlogFCall.txt"
	fdr_file = "/home/andrew/paper1/mouseData/R62/datFDRall.txt"
	datfc = pd.read_csv(fc_file, sep = " ")
	datfdr = pd.read_csv(fdr_file, sep = " ")

	datall = pd.merge(datfc,datfdr, on = 'Entrez.Gene')
	mouse2Human = pd.read_csv("/home/andrew/extData/hdinhd/orthoConv2.txt",sep = "\t")
	mouse2Human2 = mouse2Human.dropna(subset = ['Gene ID.1'])
	mouse2Human2['human'] = mouse2Human2['Gene ID.1'].apply(lambda x: int(x.split(';')[0]))
	convDictMH = dict(zip(mouse2Human2['Gene ID'],mouse2Human2['human']))

			
	def mouseToEntrez(mouseid):
		try:
			return(convDictMH[mouseid])
		except:
			return('NA')

	def extGene(gene):
		return(int(gene.split("///")[0]))

	datall['Entrez.Gene'] = datall['Entrez.Gene'].apply(extGene)
	datall['EntrezHuman'] = datall['Entrez.Gene'].apply(mouseToEntrez)
	return datall


'''
def loadConvTab():
	homedir = "/home/andrew/"

	mgitab = pd.read_csv('/home/andrew/paper1/HOM_MouseHumanSequence.rpt', sep = '\t')

	homids = set(mgitab['HomoloGene ID'])

	#G = nx.Graph()
	#outTab = pd.DataFrame(columns = ['human','mouse'])
	#j = 0

	outTab = pd.DataFrame(columns = ['mouse','human'])
	j = 0

	for i in homids:
		for row1 in mgitab[(mgitab['HomoloGene ID'] == i) & (mgitab['NCBI Taxon ID'] == 10090)].iterrows():
			mouseid = row1[1]['EntrezGene ID']
			mouseSymbol = row1[1]['Symbol']
			for row2 in mgitab[(mgitab['HomoloGene ID'] == i) & (mgitab['NCBI Taxon ID'] == 9606)].iterrows():
				humanid = row2[1]['EntrezGene ID']
				humanSymbol = row2[1]['Symbol']
				#G.add_node(mouseid, species = "mouse", symbol = mouseSymbol)
				#G.add_node(humanid, species = "human", symbol = humanSymbol)
				outTab.loc[j] = {'mouse' : mouseid, 'human' : humanid}
				j = j + 1


	outTab.to_csv('/home/andrew/paper1/ntvsfl/mousecomp/convtab.csv')
			#G.add_edge(mouseid,humanid)
			#outTab.loc[j] = pd.Series({'human' : humanid, 'mouse':mouseid})
			#j+=1

	### exporting graph as a table
	#outTab.to_csv('/home/andrew/paper1/ntvsfl/mouse2Human.csv')
	mgi_m = mgitab[mgitab['Common Organism Name'] == 'mouse, laboratory']
	mgi_h = mgitab[mgitab['Common Organism Name'] == 'human']

	mgijoin = mgi_m.join(other = mgi_h, on = 'HomoloGene ID', how = 'outer', lsuffix = '_mouse', rsuffix = '_human')

	
	mouse2Human = pd.read_csv("/home/andrew/extData/hdinhd/orthoConv2.txt",sep = "\t")
	mouse2Human2 = mouse2Human.dropna(subset = ['Gene ID.1'])
	mouse2Human2['human'] = mouse2Human2['Gene ID.1'].apply(lambda x: int(x.split(';')[0]))
	
	convDictMH = dict(zip(mgijoin['EntrezGene ID_mouse'],mgijoin['EntrezGene ID_human']))

	def convToHuman(mouseGene):
		try:
			return convDictMH[mouseGene]
		except:
			return 'NA'
	mouseTable['EntrezMouse'] = mouseTable.index
	#mouseTable['EntrezHuman'] = mouseTable['EntrezMouse'].apply(convToHuman)

	#contQ = ['continuous' not in col for col in mouseTable.columns]

	return mouseTable, G
'''